Now we run this a second time, on the second (b) feature table that has removed all epithets with fewer than 27 representative documents. The results are better (overall F1 score for decision tree is 0.44, random forest is 0.47; in a these were 0.33 and 0.40, respectively).


In [1]:
import os
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier


/root/venv/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
from sklearn import clone
from sklearn import preprocessing
from sklearn import svm
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

import datetime as dt

In [3]:
fp_df = os.path.expanduser('~/cltk_data/user_data/tlg_bow_df.pickle')
dataframe_bow = joblib.load(fp_df)

In [4]:
Y = dataframe_bow['epithet']

In [5]:
X = dataframe_bow.drop(['epithet', 'id', 'author'], 1)

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)

In [7]:
def scale_data(X_train, X_test, Y_train, Y_test):
    """Take Vectors, 
    """

    '''
    -PREPOCESSING 
    -Here, scaled data has zero mean and unit varience
    -We save the scaler to later use with testing/prediction data
    '''
    print('Scaling data ...')
    t0 = dt.datetime.utcnow()
    scaler = preprocessing.StandardScaler().fit(X_train)
    fp_scaler = os.path.expanduser('~/cltk_data/user_data/tlg_bow_scaler.pickle')
    joblib.dump(scaler, fp_scaler)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()

    return X_train_scaled, X_test_scaled, Y_train, Y_test

In [8]:
X_train_scaled, X_test_scaled, Y_train, Y_test = scale_data(X_train, X_test, Y_train, Y_test)


Scaling data ...
... finished in 0:00:02.693181 secs.

Decision tree


In [9]:
def run_tree(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Run decision tree with scikit.
    
    Experiment with: 'max_depth'
    """
    '''
    -This is where we define the models with pre-defined parameters
    -We can learn these parameters given our data
    '''
    print('Defining and fitting models ...')
    t0 = dt.datetime.utcnow()
    dec_tree = DecisionTreeClassifier()

    dec_tree.fit(X_train_scaled, Y_train)

    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_dt.pickle')
    joblib.dump(dec_tree, fp_model_pickle)

    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()
    

    Y_prediction_tree = dec_tree.predict(X_test_scaled)
    print('tree_predictions ', Y_prediction_tree)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print('----Tree_report--------------------------------')
    print(classification_report(expected, Y_prediction_tree))

In [10]:
run_tree(X_train_scaled, X_test_scaled, Y_train, Y_test)


Defining and fitting models ...
... finished in 0:00:26.187486 secs.

tree_predictions  ['Grammatici' 'Scriptores Ecclesiastici' 'Comici' 'Philosophici/-ae'
 'Comici' 'Theologici' 'Philosophici/-ae' 'Tragici' 'Lyrici/-ae'
 'Scriptores Ecclesiastici' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Tragici' 'Theologici' 'Philosophici/-ae'
 'Tragici' 'Scriptores Ecclesiastici' 'Tragici' 'Philosophici/-ae'
 'Tragici' 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Tragici' 'Comici'
 'Historici/-ae' 'Scriptores Ecclesiastici' 'Comici' 'Grammatici'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Grammatici'
 'Historici/-ae' 'Comici' 'Philosophici/-ae' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Tragici' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Scriptores Ecclesiastici' 'Lyrici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Philosophici/-ae' 'Epici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Rhetorici'
 'Philosophici/-ae' 'Comici' 'Comici' 'Philosophici/-ae' 'Tragici' 'Comici'
 'Sophistae' 'Philosophici/-ae' 'Tragici' 'Historici/-ae' 'Grammatici'
 'Comici' 'Historici/-ae' 'Comici' 'Historici/-ae' 'Historici/-ae'
 'Tragici' 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Tragici'
 'Sophistae' 'Philosophici/-ae' 'Tragici' 'Scriptores Ecclesiastici'
 'Rhetorici' 'Rhetorici' 'Philosophici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Tragici' 'Lyrici/-ae' 'Tragici' 'Historici/-ae'
 'Comici' 'Scriptores Ecclesiastici' 'Alchemistae' 'Historici/-ae'
 'Epici/-ae' 'Theologici' 'Historici/-ae' 'Historici/-ae' 'Tragici'
 'Historici/-ae' 'Philosophici/-ae' 'Comici' 'Comici' 'Tragici' 'Tragici'
 'Medici' 'Medici' 'Comici' 'Tragici' 'Historici/-ae' 'Tragici' 'Sophistae'
 'Medici' 'Tragici' 'Philosophici/-ae' 'Philosophici/-ae' 'Tragici'
 'Historici/-ae' 'Rhetorici' 'Tragici' 'Historici/-ae' 'Philosophici/-ae'
 'Rhetorici' 'Historici/-ae' 'Scriptores Ecclesiastici' 'Comici' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Scriptores Ecclesiastici' 'Tragici' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Medici' 'Historici/-ae' 'Rhetorici'
 'Historici/-ae' 'Historici/-ae' 'Elegiaci' 'Philosophici/-ae'
 'Historici/-ae' 'Grammatici' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Comici' 'Historici/-ae' 'Historici/-ae'
 'Tragici' 'Tragici' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Medici' 'Historici/-ae' 'Elegiaci' 'Historici/-ae' 'Comici' 'Tragici'
 'Historici/-ae' 'Philosophici/-ae' 'Comici' 'Elegiaci' 'Comici' 'Comici'
 'Historici/-ae' 'Tragici' 'Tragici' 'Tragici' 'Historici/-ae' 'Grammatici'
 'Rhetorici' 'Medici' 'Comici' 'Tragici' 'Historici/-ae' 'Epici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Comici'
 'Philosophici/-ae' 'Medici' 'Historici/-ae' 'Philosophici/-ae' 'Comici'
 'Tragici' 'Poetae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Tragici' 'Philosophici/-ae' 'Tragici' 'Tragici' 'Historici/-ae' 'Tragici'
 'Historici/-ae' 'Grammatici' 'Tragici' 'Scriptores Ecclesiastici'
 'Epici/-ae' 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Tragici' 'Philosophici/-ae' 'Tragici'
 'Historici/-ae' 'Comici' 'Historici/-ae' 'Historici/-ae'
 'Scriptores Ecclesiastici' 'Philosophici/-ae' 'Scriptores Ecclesiastici'
 'Comici' 'Grammatici' 'Lyrici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Sophistae'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Tragici' 'Theologici'
 'Historici/-ae' 'Tragici' 'Epici/-ae' 'Rhetorici' 'Historici/-ae' 'Comici'
 'Tragici' 'Historici/-ae' 'Grammatici' 'Tragici' 'Grammatici'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Lyrici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Comici' 'Comici' 'Comici' 'Historici/-ae' 'Tragici' 'Comici'
 'Historici/-ae' 'Tragici' 'Comici' 'Historici/-ae' 'Tragici'
 'Philosophici/-ae' 'Medici' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Grammatici' 'Philosophici/-ae' 'Philosophici/-ae'
 'Comici' 'Historici/-ae' 'Tragici' 'Tragici' 'Historici/-ae' 'Tragici'
 'Historici/-ae' 'Tragici' 'Tragici' 'Tragici' 'Comici' 'Historici/-ae'
 'Comici' 'Comici' 'Historici/-ae' 'Historici/-ae' 'Tragici'
 'Historici/-ae' 'Tragici' 'Philosophici/-ae']
actual_values    7               Philosophici/-ae
1108    Scriptores Ecclesiastici
67                        Comici
1233            Philosophici/-ae
238                       Comici
23      Scriptores Ecclesiastici
1228                   Rhetorici
514             Philosophici/-ae
467                   Lyrici/-ae
470                   Theologici
1291               Historici/-ae
1399                    Elegiaci
1163                     Tragici
1505                      Comici
1370                 Alchemistae
657                    Epici/-ae
314             Philosophici/-ae
1461                      Comici
104                      Tragici
695                Historici/-ae
599             Philosophici/-ae
161                       Comici
949                   Theologici
753                       Comici
1460                      Medici
267                       Comici
1586                      Comici
1570                 Alchemistae
929                      Tragici
811                       Comici
                  ...           
1199               Historici/-ae
503                   Lyrici/-ae
871                    Rhetorici
673                       Medici
100                       Comici
593                Historici/-ae
1005               Historici/-ae
952             Philosophici/-ae
1077    Scriptores Ecclesiastici
622             Philosophici/-ae
1113                      Comici
944                    Epici/-ae
139                      Tragici
1568                    Elegiaci
316                Historici/-ae
512                       Comici
342                Historici/-ae
1419               Historici/-ae
351                       Comici
1400                      Comici
688                       Comici
159                    Rhetorici
1339                      Comici
940                       Comici
1253               Historici/-ae
1338                   Epici/-ae
1492                     Tragici
981                Historici/-ae
1136                     Tragici
1094                 Alchemistae
Name: epithet, dtype: object

----Tree_report--------------------------------
                          precision    recall  f1-score   support

             Alchemistae       1.00      0.14      0.25         7
                  Comici       0.68      0.51      0.58        51
                Elegiaci       0.00      0.00      0.00         8
               Epici/-ae       0.20      0.05      0.08        19
              Grammatici       0.45      0.25      0.32        20
           Historici/-ae       0.60      0.75      0.67        84
              Lyrici/-ae       0.40      0.17      0.24        12
                  Medici       0.25      0.29      0.27         7
        Philosophici/-ae       0.47      0.52      0.49        46
                  Poetae       0.00      0.00      0.00         7
               Rhetorici       0.25      0.17      0.20        12
Scriptores Ecclesiastici       0.67      0.62      0.64        13
               Sophistae       0.00      0.00      0.00         8
              Theologici       0.25      0.20      0.22         5
                 Tragici       0.19      0.73      0.30        15

             avg / total       0.47      0.46      0.44       314

Random forest


In [12]:
def run_random_forest(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Scikit random forest
    
    Experiment with 'n_estimators'
    """
    t0 = dt.datetime.utcnow()

    n_estimators = 30
    
    rf_model = RandomForestClassifier(n_estimators=n_estimators)

    # Train
    clf = clone(rf_model)
    clf = rf_model.fit(X_train_scaled, Y_train)
    
    #joblib.dump(clf, 'models/random_forest.pickle')

    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_fandom_forest.pickle')
    joblib.dump(clf, fp_model_pickle)
    
    scores = clf.score(X_train_scaled, Y_train)
    
    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()
    
    Y_prediction = clf.predict(X_test_scaled)
    print('tree_predictions ', Y_prediction)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print('----Random forest report--------------------------------')
    print(classification_report(expected, Y_prediction))

In [13]:
run_random_forest(X_train_scaled, X_test_scaled, Y_train, Y_test)


... finished in 0:00:11.173582 secs.

tree_predictions  ['Philosophici/-ae' 'Historici/-ae' 'Comici' 'Philosophici/-ae' 'Comici'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Tragici'
 'Scriptores Ecclesiastici' 'Tragici' 'Tragici' 'Scriptores Ecclesiastici'
 'Tragici' 'Philosophici/-ae' 'Tragici' 'Comici' 'Philosophici/-ae'
 'Tragici' 'Tragici' 'Historici/-ae' 'Historici/-ae'
 'Scriptores Ecclesiastici' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Grammatici' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Philosophici/-ae' 'Tragici'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Scriptores Ecclesiastici' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Scriptores Ecclesiastici' 'Epici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Tragici' 'Comici' 'Sophistae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Grammatici' 'Historici/-ae' 'Tragici' 'Comici' 'Tragici'
 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Comici' 'Grammatici' 'Philosophici/-ae' 'Tragici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Philosophici/-ae' 'Tragici'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Epici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae'
 'Tragici' 'Tragici' 'Philosophici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Tragici' 'Tragici' 'Rhetorici'
 'Grammatici' 'Comici' 'Tragici' 'Historici/-ae' 'Tragici' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Historici/-ae' 'Tragici'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Comici'
 'Philosophici/-ae' 'Historici/-ae' 'Scriptores Ecclesiastici'
 'Historici/-ae' 'Comici' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Scriptores Ecclesiastici' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Philosophici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Comici' 'Philosophici/-ae' 'Comici' 'Tragici'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Tragici' 'Philosophici/-ae' 'Historici/-ae' 'Comici' 'Tragici'
 'Historici/-ae' 'Epici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Medici'
 'Comici' 'Comici' 'Comici' 'Epici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Historici/-ae' 'Comici' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Tragici' 'Comici' 'Poetae'
 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae' 'Comici'
 'Philosophici/-ae' 'Tragici' 'Tragici' 'Historici/-ae' 'Tragici' 'Comici'
 'Philosophici/-ae' 'Tragici' 'Scriptores Ecclesiastici' 'Epici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Tragici' 'Historici/-ae' 'Historici/-ae'
 'Tragici' 'Tragici' 'Tragici' 'Historici/-ae' 'Historici/-ae' 'Comici'
 'Historici/-ae' 'Historici/-ae' 'Scriptores Ecclesiastici'
 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Comici' 'Philosophici/-ae' 'Historici/-ae'
 'Historici/-ae' 'Historici/-ae' 'Comici' 'Historici/-ae'
 'Philosophici/-ae' 'Tragici' 'Historici/-ae' 'Rhetorici' 'Historici/-ae'
 'Comici' 'Tragici' 'Historici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Philosophici/-ae' 'Comici' 'Historici/-ae'
 'Philosophici/-ae' 'Historici/-ae' 'Historici/-ae' 'Historici/-ae'
 'Philosophici/-ae' 'Comici' 'Philosophici/-ae' 'Comici' 'Historici/-ae'
 'Tragici' 'Comici' 'Historici/-ae' 'Tragici' 'Comici' 'Historici/-ae'
 'Tragici' 'Historici/-ae' 'Medici' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Philosophici/-ae' 'Historici/-ae' 'Philosophici/-ae'
 'Comici' 'Historici/-ae' 'Philosophici/-ae' 'Lyrici/-ae' 'Historici/-ae'
 'Tragici' 'Historici/-ae' 'Tragici' 'Tragici' 'Tragici' 'Comici'
 'Historici/-ae' 'Comici' 'Comici' 'Historici/-ae' 'Comici' 'Historici/-ae'
 'Historici/-ae' 'Tragici' 'Philosophici/-ae']
actual_values    7               Philosophici/-ae
1108    Scriptores Ecclesiastici
67                        Comici
1233            Philosophici/-ae
238                       Comici
23      Scriptores Ecclesiastici
1228                   Rhetorici
514             Philosophici/-ae
467                   Lyrici/-ae
470                   Theologici
1291               Historici/-ae
1399                    Elegiaci
1163                     Tragici
1505                      Comici
1370                 Alchemistae
657                    Epici/-ae
314             Philosophici/-ae
1461                      Comici
104                      Tragici
695                Historici/-ae
599             Philosophici/-ae
161                       Comici
949                   Theologici
753                       Comici
1460                      Medici
267                       Comici
1586                      Comici
1570                 Alchemistae
929                      Tragici
811                       Comici
                  ...           
1199               Historici/-ae
503                   Lyrici/-ae
871                    Rhetorici
673                       Medici
100                       Comici
593                Historici/-ae
1005               Historici/-ae
952             Philosophici/-ae
1077    Scriptores Ecclesiastici
622             Philosophici/-ae
1113                      Comici
944                    Epici/-ae
139                      Tragici
1568                    Elegiaci
316                Historici/-ae
512                       Comici
342                Historici/-ae
1419               Historici/-ae
351                       Comici
1400                      Comici
688                       Comici
159                    Rhetorici
1339                      Comici
940                       Comici
1253               Historici/-ae
1338                   Epici/-ae
1492                     Tragici
981                Historici/-ae
1136                     Tragici
1094                 Alchemistae
Name: epithet, dtype: object

----Random forest report--------------------------------
                          precision    recall  f1-score   support

             Alchemistae       0.00      0.00      0.00         7
                  Comici       0.82      0.63      0.71        51
                Elegiaci       0.00      0.00      0.00         8
               Epici/-ae       0.40      0.11      0.17        19
              Grammatici       0.75      0.15      0.25        20
           Historici/-ae       0.59      0.89      0.71        84
              Lyrici/-ae       0.00      0.00      0.00        12
                  Medici       1.00      0.29      0.44         7
        Philosophici/-ae       0.47      0.74      0.58        46
                  Poetae       0.00      0.00      0.00         7
               Rhetorici       1.00      0.17      0.29        12
Scriptores Ecclesiastici       0.50      0.38      0.43        13
               Sophistae       0.00      0.00      0.00         8
              Theologici       0.00      0.00      0.00         5
                 Tragici       0.22      0.73      0.34        15

             avg / total       0.52      0.53      0.47       314

/root/venv/lib/python3.5/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

SVC


In [ ]:
def run_svc(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Run SVC with scikit."""
    # This is where we define the models with pre-defined parameters
    # We can learn these parameters given our data
    print('Defining and fitting SVC model ...')
    t0 = dt.datetime.utcnow()   
    scv = svm.LinearSVC(C=100.)

    scv.fit(X_train_scaled, Y_train)

    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_svc.pickle')
    joblib.dump(scv, fp_model_pickle)

    print('... finished in {} secs.'.format(dt.datetime.utcnow() - t0))
    print()
    

    Y_prediction_svc = scv.predict(X_test_scaled)
    print('svc_predictions ', Y_prediction_svc)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print('----SVC_report--------------------------------')
    print(classification_report(expected, Y_prediction_svc))

In [ ]:
run_svc(X_train_scaled, X_test_scaled, Y_train, Y_test)

ADA boost


In [ ]:
def run_ada_boost(X_train_scaled, X_test_scaled, Y_train, Y_test):
    """Scikit random forest.
    
    For plotting see:
    http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_iris.html
    
    Experiment with 'n_estimators'
    """
    
    n_estimators = 30
    ada_classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),
                                        n_estimators=n_estimators)

    # Train
    clf = clone(ada_classifier)
    clf = ada_classifier.fit(X_train_scaled, Y_train)
    
    fp_model_pickle = os.path.expanduser('~/cltk_data/user_data/tlg_bow_ada_boost.pickle')
    joblib.dump(clf, fp_model_pickle)
    
    scores = clf.score(X_train_scaled, Y_train)
    
    
    
    Y_prediction = clf.predict(X_test_scaled)
    print('tree_predictions ', Y_prediction)

    expected = Y_test
    print('actual_values   ', expected)


    print()
    print(classification_report(expected, Y_prediction))

In [ ]:
run_ada_boost(X_train_scaled, X_test_scaled, Y_train, Y_test)